import scanpy as sc, anndata as ad, numpy as np, pandas as pd
from scipy import sparse
import matplotlib.pyplot as plt
import plotly.express as px
from anndata import AnnData
from sklearn.neighbors import KernelDensity
import random
from matplotlib import pylab
import warnings
import scanpy.external as sce
import yaml
from matplotlib.colors import TwoSlopeNorm
import seaborn as sns
import os
import sys
import random
random.seed(1)
warnings.filterwarnings('ignore')
import anndata2ri
import rpy2.rinterface_lib.callbacks
import logging
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
anndata2ri.activate()
%load_ext rpy2.ipython
sc.settings.verbosity = 3
sc.logging.print_header()
nprocs=4
np.random.seed(0)
with open("../data/resources/rcParams.yaml") as f:
rcParamsDict = yaml.full_load(f)
for k in rcParamsDict["rcParams"]:
print("{} {}".format(k,rcParamsDict["rcParams"][k]))
plt.rcParams[k] = rcParamsDict["rcParams"][k]
for k1 in set(list(rcParamsDict)).difference(set(["rcParams"])):
print("{} {}".format(k1,rcParamsDict[k1]))
scanpy==1.8.0 anndata==0.8.0 umap==0.4.6 numpy==1.22.2 scipy==1.6.2 pandas==1.2.3 scikit-learn==0.24.1 statsmodels==0.13.5 python-igraph==0.9.1 louvain==0.7.0 leidenalg==0.8.3 figure.dpi 80 savefig.dpi 500 figure.figsize [10, 10] axes.facecolor white dotSize 20
outdir = "../data/output"
with open("../data/resources/iPSC_lines_map.yaml", 'r') as f:
iPSC_lines_map = yaml.load(f, Loader=yaml.FullLoader)["lines"]
colorsmap = dict(zip([i["newName"] for i in iPSC_lines_map.values()],[i["color"] for i in iPSC_lines_map.values()]))
figDir = "./figures"
if not os.path.exists(figDir):
# Create a new directory because it does not exist
os.makedirs(figDir)
figDir = "./figures"
if not os.path.exists(figDir):
# Create a new directory because it does not exist
os.makedirs(figDir)
#tradeSeqUtilsPath = "/opt/tradeSeq_utils.R"
tradeSeqUtilsPath = "/opt/tradeSeq_utils.R"
branchTag = "Exc"
endpoint = "endpoint_GlutamatergicNeurons_both"
markers = ["EOMES","TBR1","SLA","SATB2","GRIA3","NOVA1","NNAT","MKI67","HOPX","S100B","CYP26A1"]
BranchBCs = sc.read_h5ad(outdir+"/adatas/adataPaga.h5ad")
sc.pl.draw_graph(BranchBCs[BranchBCs.obs[endpoint] == 1], color = ["cellID_newName","leidenAnnotated","dataset"], size = rcParamsDict["dotSize"],add_outline=True, outline_width=(0.2, 0.05), vmin='p2',vmax='p98', wspace=.4, ncols=2)
BranchBCs = BranchBCs.obs_names[BranchBCs.obs[endpoint] == 1].tolist()
HVGs=pd.read_csv(outdir+"/HVG_list_intersection_Curated_"+branchTag+"_PagaTest.txt", sep = "\t")["HVG"].tolist()
harmonyTheta = 1
len(HVGs)
1856
adataBranch = sc.read_h5ad(outdir+'/adatas/MultiplexingPreprocessing_unscaled.h5ad')[BranchBCs]
adataBranch.obs["leidenAnnotated"] = sc.read_h5ad(outdir+"/adatas/ClusterAnnotated_Base_filt.h5ad")[adataBranch.obs_names].obs["leidenAnnotated"]
adataBranch.obs["leidenAnnotated"] = sc.read_h5ad(outdir+"/adatas/adataPaga.h5ad")[adataBranch.obs_names].obs["leidenAnnotated"]
adataBranch.uns["leidenAnnotated_colors"] = sc.read_h5ad(outdir+"/adatas/adataPaga.h5ad")[adataBranch.obs_names].uns["leidenAnnotated_colors"]
goodcls = adataBranch.obs.groupby(["leidenAnnotated"]).size()[adataBranch.obs.groupby(["leidenAnnotated"]).size() >= 50].index.tolist()
adataBranch = adataBranch[adataBranch.obs["leidenAnnotated"].isin(goodcls)]
adataBranch.layers["unscaled"] = adataBranch.X.copy()
sc.pp.normalize_total(adataBranch, target_sum=1e4)
sc.pp.log1p(adataBranch)
adataBranch.raw = adataBranch
adataBranch = adataBranch[:,HVGs]
adataBranch.var["highly_variable"] = True
sc.pp.regress_out(adataBranch, ['total_counts','pct_counts_mt'], n_jobs=4)
sc.pp.scale(adataBranch, zero_center=False)
#adataBranch.X = adataBranch.X + np.abs(adataBranch.X.min())
normalizing counts per cell
finished (0:00:00)
regressing out ['total_counts', 'pct_counts_mt']
sparse input is densified and may lead to high memory use
finished (0:00:05)
sc.tl.pca(adataBranch, svd_solver='arpack', use_highly_variable=True, random_state=0)
sc.pl.pca_variance_ratio(adataBranch)
sce.pp.harmony_integrate(adataBranch, 'dataset', max_iter_harmony=20,adjusted_basis="X_pca",random_state = 0, theta = harmonyTheta, lamb = 2)
#sc.pp.neighbors(adataBranch,use_rep="X_pca",n_neighbors=50, n_pcs=10, random_state=0)
sc.pp.neighbors(adataBranch,use_rep="X_pca",n_neighbors=60, n_pcs=9, random_state=0)
sc.tl.umap(adataBranch, random_state=0)
computing PCA
on highly variable genes
with n_comps=50
finished (0:00:01)
2023-07-24 09:22:21,866 - harmonypy - INFO - Iteration 1 of 20 2023-07-24 09:22:23,429 - harmonypy - INFO - Iteration 2 of 20 2023-07-24 09:22:24,966 - harmonypy - INFO - Converged after 2 iterations
computing neighbors
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:11)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:18)
sc.pl.pca(adataBranch, color = ["dataset","stage",'leidenAnnotated'], size = rcParamsDict["dotSize"],add_outline=True, outline_width=(0.2, 0.05), vmin='p2',vmax='p98',wspace=.4)
adataPaga = adataBranch.copy()
thresholdPagaEdges = 0.06
leidenres = .4
sc.tl.leiden(adataPaga, resolution=leidenres, key_added="leiden_"+str(leidenres), random_state=0)
adataPaga.obs.groupby("leiden_"+str(leidenres)).size()
sc.tl.paga(adataPaga, groups="leiden_"+str(leidenres), use_rna_velocity=False)
## Nuclei coordinates creation for paga
groups="leiden_"+str(leidenres)
basis="X_umap"
if 'embeddings' in globals():
del embeddings
for l in adataPaga.obs[groups].cat.categories:
test=adataPaga.obsm[basis][adataPaga.obs[groups] == l]
x=np.mean(test[:,0])
y=np.mean(test[:,1])
if not 'embeddings' in globals():
embeddings = np.array([x,y])
else:
embeddings=np.vstack([embeddings,np.array([x,y])])
sc.pl.umap(adataPaga, color = ["leidenAnnotated","leiden_"+str(leidenres)],ncols=2,
size = rcParamsDict["dotSize"],add_outline=True, outline_width=(0.2, 0.05), vmin='p2',vmax='p98', frameon=False, legend_loc="right margin",wspace =.8)
sc.pl.paga(adataPaga, pos=embeddings, max_edge_width=2, threshold = thresholdPagaEdges, color = "stage", node_size_scale=4, save="Paga.svg")
#[22, 33, 40, 37, 9, 12, 20, 31, 6, 36, 3, 29]
## Shortest paths
# Setting terminal states from leiden + annotation
endPointsMap = pd.crosstab(adataPaga.obs["leidenAnnotated"],adataPaga.obs["leiden_"+str(leidenres)]) / pd.crosstab(adataPaga.obs["leidenAnnotated"],adataPaga.obs["leiden_"+str(leidenres)]).sum(axis = 0)
endPointsMap = endPointsMap.idxmax().to_dict()
endPointsMap = dict((v,int(k)) for k,v in endPointsMap.items() if v in ["GlutamatergicNeurons_late","GlutamatergicNeurons_early"])
endPointsMap
#endPointsMap = {
# "GlutamatergicNeurons_early":3,
#"GlutamatergicNeurons_late":1,
#}
TOP2Aadata = adataPaga[:,"TOP2A"].copy()
TOP2Aadata.obs["TOP2A"] = TOP2Aadata.X.flatten()
root = int(TOP2Aadata.obs.groupby("leiden_"+str(leidenres))["TOP2A"].mean().idxmax())
del TOP2Aadata
# Setting terminal states from leiden + annotation
import networkx as nx
testGraph = nx.from_numpy_matrix(adataPaga.uns["paga"]["connectivities"].todense(), parallel_edges=False, create_using=None )
#Here we manually remove edge between Early and Late endpoints to avoid closed loop due to their similarity
testGraph.remove_edge(endPointsMap["GlutamatergicNeurons_late"], endPointsMap["GlutamatergicNeurons_early"])
testGraph.remove_edges_from([(n1, n2) for n1, n2, w in testGraph.edges(data="weight") if w < thresholdPagaEdges])
for t in testGraph.edges(data="weight"):
testGraph[t[0]][t[1]]["weight"] = 1-t[2]
for endpoint in list(endPointsMap.keys()):
sp = nx.shortest_path(testGraph, source=root, target=endPointsMap[endpoint], weight="weight", method='bellman-ford')
print(sp)
#adataNeurons_vel.obs["endpoint_"+endpoint] = False
adataPaga.obs["endpoint_"+str(endpoint)] = adataPaga.obs["leiden_"+str(leidenres)].astype(int).isin(sp).astype(int)
sc.pl.umap(adataPaga, color = ["leidenAnnotated","leiden_"+str(leidenres),"endpoint_"+str(endpoint),"stage"],ncols=2,
size = rcParamsDict["dotSize"],add_outline=True, outline_width=(0.2, 0.05), vmin='p2',vmax='p98', frameon=False, wspace=.4, legend_loc="right margin")
running Leiden clustering
finished: found 6 clusters and added
'leiden_0.4', the cluster labels (adata.obs, categorical) (0:00:01)
running PAGA
finished: added
'paga/connectivities', connectivities adjacency (adata.uns)
'paga/connectivities_tree', connectivities subtree (adata.uns) (0:00:00)
--> added 'pos', the PAGA positions (adata.uns['paga']) WARNING: saving figure to file figures/pagaPaga.svg
[2, 3, 1]
[2, 5, 0, 4]
RootName = adataPaga.obs_names[adataPaga[:, ["TOP2A"]].X.argmax()]
root_idx = np.where(adataPaga.obs_names == RootName)[0][0]
adataPaga.uns['iroot'] = root_idx
sc.tl.diffmap(adataPaga)
sc.tl.dpt(adataPaga)
sc.pl.diffmap(adataPaga, color = ["dpt_pseudotime","endpoint_"+list(endPointsMap.keys())[1],"endpoint_"+list(endPointsMap.keys())[0]],size = rcParamsDict["dotSize"],add_outline=True, outline_width=(0.2, 0.05), vmin='p2',vmax='p98', frameon=False, wspace=.3, legend_loc="on data")
computing Diffusion Maps using n_comps=15(=n_dcs)
computing transitions
finished (0:00:00)
eigenvalues of transition matrix
[1. 0.99498755 0.988162 0.98487335 0.98246974 0.9682833
0.9659444 0.9648555 0.9537523 0.9417448 0.93550366 0.9344169
0.9280085 0.9246718 0.91359776]
finished: added
'X_diffmap', diffmap coordinates (adata.obsm)
'diffmap_evals', eigenvalues of transition matrix (adata.uns) (0:00:00)
computing Diffusion Pseudotime using n_dcs=10
finished: added
'dpt_pseudotime', the pseudotime (adata.obs) (0:00:00)
adataPaga = adataPaga[adataPaga.obs["endpoint_GlutamatergicNeurons_early"] + adataPaga.obs["endpoint_GlutamatergicNeurons_late"] > 0]
adataPaga[adataPaga.obs["endpoint_GlutamatergicNeurons_early"] == 1].write_h5ad(outdir+"/adatas/AdataLineage.Exc_early.h5ad")
adataPaga[adataPaga.obs["endpoint_GlutamatergicNeurons_late"] == 1].write_h5ad(outdir+"/adatas/AdataLineage.Exc_late.h5ad")
#adataPaga = sc.read_h5ad(outdir+"/adatas/AdataLineage."+branchTag+".h5ad")
#del adataPaga
lineage1 = ["endpoint_GlutamatergicNeurons_early"]
lineage2 = ["endpoint_GlutamatergicNeurons_late"]
lineageOrder = ["endpoint_GlutamatergicNeurons_early","endpoint_GlutamatergicNeurons_late"]
obs = adataPaga.obs
obsm = adataPaga.obsm
#adataTradeseq = sc.read_h5ad(outdir+'/adatas/MultiplexingPreprocessing_unscaled.h5ad')[obs.index,HVGs]
adataTradeseq = sc.read_h5ad(outdir+'/adatas/MultiplexingPreprocessing_unscaled.h5ad')[obs.index]
ExtraNormGenes = random.sample(adataTradeseq.var_names[~adataTradeseq.var_names.isin(HVGs)].tolist(), len(HVGs))
adataTradeseq = adataTradeseq[:,HVGs+ExtraNormGenes]
adataTradeseq.uns["leidenAnnotated_colors"] = adataBranch[adataTradeseq.obs_names].uns["leidenAnnotated_colors"]
adataTradeseq.obs = obs.loc[adataTradeseq.obs_names]
adataTradeseq = adataTradeseq[:,~adataTradeseq.var_names.str.startswith('MT-')]
adataTradeseq = adataTradeseq[:,~adataTradeseq.var_names.str.startswith(("RPS","RPL"))]
adataTradeseq.obsm = obsm
# we store info about lineages to balance among them
adataTradeseq.obs["Trajectories"] = np.nan
adataTradeseq.obs["Trajectories"] = np.where(adataTradeseq.obs[lineage1] == 1, lineage1[0],lineage2[0] )
adataTradeseq.obs["Trajectories"] = np.where((adataTradeseq.obs[lineage1[0]] + adataTradeseq.obs[lineage2[0]]) == 2,'Common',adataTradeseq.obs["Trajectories"] )
#Trim extreme values of DPT (for potentially low amount of cells)
trimExtremes = [0.01,.99]
minDPT, maxDPT = np.quantile(adataTradeseq.obs["dpt_pseudotime"], trimExtremes)
adataTradeseq = adataTradeseq[(adataTradeseq.obs["dpt_pseudotime"] > minDPT) & (adataTradeseq.obs["dpt_pseudotime"] < maxDPT)]
## TrimPseudotime extremes and keep common cells
adataTradeseq.obs["dpt_bin"] = pd.cut(adataTradeseq.obs["dpt_pseudotime"], 100)
CommonCells = adataTradeseq.obs_names[adataTradeseq.obs["Trajectories"] == 'Common'].tolist()
pd.crosstab(adataTradeseq.obs["Trajectories"],adataTradeseq.obs["dpt_bin"])
| dpt_bin | (0.0448, 0.0548] | (0.0548, 0.0638] | (0.0638, 0.0729] | (0.0729, 0.0819] | (0.0819, 0.091] | (0.091, 0.1] | (0.1, 0.109] | (0.109, 0.118] | (0.118, 0.127] | (0.127, 0.136] | ... | (0.861, 0.87] | (0.87, 0.879] | (0.879, 0.888] | (0.888, 0.897] | (0.897, 0.906] | (0.906, 0.915] | (0.915, 0.924] | (0.924, 0.933] | (0.933, 0.942] | (0.942, 0.951] |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Trajectories | |||||||||||||||||||||
| Common | 40 | 33 | 17 | 23 | 16 | 27 | 15 | 12 | 18 | 14 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| endpoint_GlutamatergicNeurons_early | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 127 | 116 | 124 | 109 | 87 | 91 | 70 | 64 | 35 | 42 |
| endpoint_GlutamatergicNeurons_late | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 99 | 60 | 43 | 15 | 12 | 2 | 1 | 0 | 0 | 0 |
3 rows × 100 columns
# Define subsampling rates
minVals = pd.crosstab(adataTradeseq.obs["Trajectories"],adataTradeseq.obs["dpt_bin"]).loc[lineageOrder].min()
bclist = []
for dptbin in minVals.index.tolist():
for i in lineageOrder:
BINcells = adataTradeseq.obs_names[(adataTradeseq.obs["dpt_bin"] == dptbin) & (adataTradeseq.obs["Trajectories"] == i)].tolist()
bclist.extend(random.sample(BINcells, minVals.loc[dptbin]))
adataTradeseq = adataTradeseq[bclist+CommonCells]
adataTradeseq = adataTradeseq[:,adataTradeseq.X.sum(axis = 0) > 0]
ExpressedGenes = adataTradeseq.var_names.tolist()
#We keep only genes with at least 1 count in each trajectory
adataTradeseq = adataTradeseq[:,(adataTradeseq.X.todense().T.dot(pd.get_dummies(adataTradeseq.obs["Trajectories"])) > 0).sum(axis = 1) ==3]
sc.pl.umap(adataTradeseq, color = ["dpt_pseudotime","Trajectories",lineage1[0],lineage2[0]],size = 50
,add_outline=True, outline_width=(0.2, 0.05), vmin='p2',vmax='p98', frameon=False, wspace=.3, legend_loc="on data")
cellWeights = adataTradeseq.obs[lineageOrder]
cellWeights = np.matrix(cellWeights.to_numpy())
pseudotime = adataTradeseq.obs["dpt_pseudotime"]
pseudotime = pd.DataFrame({lineageOrder[0]:pseudotime,lineageOrder[1]:pseudotime})
pseudotime = np.matrix(pseudotime.to_numpy())
# Transpose needed!
counts = adataTradeseq.X.T.todense()
counts = counts + abs(counts.min())
varnames = adataTradeseq.var_names.tolist()
import anndata2ri
import rpy2
anndata2ri.activate()
%load_ext rpy2.ipython
The rpy2.ipython extension is already loaded. To reload it, use: %reload_ext rpy2.ipython
%%R -i tradeSeqUtilsPath
source(tradeSeqUtilsPath)
%%R -i cellWeights -i pseudotime -i counts -i varnames
set.seed(8)
library(tradeSeq)
library(ggplot2)
w <- as.matrix(cellWeights)
dpt <- as.matrix(pseudotime)
cMatrix <- as.matrix(counts)
gamObj <- fitGAM(cMatrix, verbose = TRUE, pseudotime = dpt, cellWeights = w, nknots = 6, sce=FALSE)
names(gamObj) <- varnames
| | 0 % ~calculating |+ | 1 % ~13m 16s |+ | 2 % ~14m 09s |++ | 3 % ~13m 37s |++ | 4 % ~13m 03s |+++ | 5 % ~12m 54s |+++ | 6 % ~12m 58s |++++ | 7 % ~12m 52s |++++ | 8 % ~12m 39s |+++++ | 9 % ~12m 34s |+++++ | 10% ~12m 23s |++++++ | 11% ~12m 20s |++++++ | 12% ~12m 08s |+++++++ | 13% ~11m 51s |+++++++ | 14% ~11m 36s |++++++++ | 15% ~11m 23s |++++++++ | 16% ~11m 12s |+++++++++ | 17% ~11m 07s |+++++++++ | 18% ~10m 58s |++++++++++ | 19% ~10m 53s |++++++++++ | 20% ~10m 41s |+++++++++++ | 21% ~10m 30s |+++++++++++ | 22% ~10m 22s |++++++++++++ | 23% ~10m 15s |++++++++++++ | 24% ~10m 05s |+++++++++++++ | 25% ~09m 56s |+++++++++++++ | 26% ~09m 51s |++++++++++++++ | 27% ~09m 43s |++++++++++++++ | 28% ~09m 32s |+++++++++++++++ | 29% ~09m 24s |+++++++++++++++ | 30% ~09m 16s |++++++++++++++++ | 31% ~09m 07s |++++++++++++++++ | 32% ~08m 59s |+++++++++++++++++ | 33% ~08m 49s |+++++++++++++++++ | 34% ~08m 40s |++++++++++++++++++ | 35% ~08m 34s |++++++++++++++++++ | 36% ~08m 26s |+++++++++++++++++++ | 37% ~08m 18s |+++++++++++++++++++ | 38% ~08m 11s |++++++++++++++++++++ | 39% ~08m 02s |++++++++++++++++++++ | 40% ~07m 55s |+++++++++++++++++++++ | 41% ~07m 46s |+++++++++++++++++++++ | 42% ~07m 40s |++++++++++++++++++++++ | 43% ~07m 31s |++++++++++++++++++++++ | 44% ~07m 24s |+++++++++++++++++++++++ | 45% ~07m 15s |+++++++++++++++++++++++ | 46% ~07m 07s |++++++++++++++++++++++++ | 47% ~06m 59s |++++++++++++++++++++++++ | 48% ~06m 53s |+++++++++++++++++++++++++ | 49% ~06m 45s |+++++++++++++++++++++++++ | 50% ~06m 37s |++++++++++++++++++++++++++ | 51% ~06m 29s |++++++++++++++++++++++++++ | 52% ~06m 21s |+++++++++++++++++++++++++++ | 53% ~06m 13s |+++++++++++++++++++++++++++ | 54% ~06m 05s |++++++++++++++++++++++++++++ | 55% ~05m 57s |++++++++++++++++++++++++++++ | 56% ~05m 49s |+++++++++++++++++++++++++++++ | 57% ~05m 40s |+++++++++++++++++++++++++++++ | 58% ~05m 32s |++++++++++++++++++++++++++++++ | 59% ~05m 24s |++++++++++++++++++++++++++++++ | 60% ~05m 16s |+++++++++++++++++++++++++++++++ | 61% ~05m 08s |+++++++++++++++++++++++++++++++ | 62% ~04m 60s |++++++++++++++++++++++++++++++++ | 63% ~04m 52s |++++++++++++++++++++++++++++++++ | 64% ~04m 44s |+++++++++++++++++++++++++++++++++ | 65% ~04m 37s |+++++++++++++++++++++++++++++++++ | 66% ~04m 29s |++++++++++++++++++++++++++++++++++ | 67% ~04m 25s |++++++++++++++++++++++++++++++++++ | 68% ~04m 17s |+++++++++++++++++++++++++++++++++++ | 69% ~04m 09s |+++++++++++++++++++++++++++++++++++ | 70% ~04m 01s |++++++++++++++++++++++++++++++++++++ | 71% ~03m 53s |++++++++++++++++++++++++++++++++++++ | 72% ~03m 46s |+++++++++++++++++++++++++++++++++++++ | 73% ~03m 38s |+++++++++++++++++++++++++++++++++++++ | 74% ~03m 30s |++++++++++++++++++++++++++++++++++++++ | 75% ~03m 22s |++++++++++++++++++++++++++++++++++++++ | 76% ~03m 14s |+++++++++++++++++++++++++++++++++++++++ | 77% ~03m 07s |+++++++++++++++++++++++++++++++++++++++ | 78% ~02m 59s |++++++++++++++++++++++++++++++++++++++++ | 79% ~02m 51s |++++++++++++++++++++++++++++++++++++++++ | 80% ~02m 43s |+++++++++++++++++++++++++++++++++++++++++ | 81% ~02m 35s |+++++++++++++++++++++++++++++++++++++++++ | 82% ~02m 27s |++++++++++++++++++++++++++++++++++++++++++ | 83% ~02m 19s |++++++++++++++++++++++++++++++++++++++++++ | 84% ~02m 11s |+++++++++++++++++++++++++++++++++++++++++++ | 85% ~02m 03s |+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01m 55s |++++++++++++++++++++++++++++++++++++++++++++ | 87% ~01m 47s |++++++++++++++++++++++++++++++++++++++++++++ | 88% ~01m 39s |+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~01m 31s |+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~01m 23s |++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~01m 15s |++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~01m 07s |+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~58s |+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~50s |++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~42s |++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~33s |+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~25s |+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~17s |++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~08s |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=13m 47s
%%R
source(tradeSeqUtilsPath)
%%R
checkFit = function(x){
length(x) > 1
}
bolMask <- unlist(lapply(gamObj, FUN = checkFit))
%%R
gamObj <- gamObj[bolMask]
pylab.rcParams['figure.figsize'] = (2, 2)
%%R -i lineageOrder -o TopTransientsExpr -o TopTransients -o compare -o patternResSS -o endRes -o patternRes
patternRes <- patternTest(gamObj)
patternResSS <- na.omit(patternRes[patternRes$pvalue < 0.01,])
#patternRes$fdr <- p.adjust(patternRes$pvalue, method = "fdr", n = length(patternRes$pvalue))
#patternResSS <- na.omit(patternRes[patternRes$fdr < 0.01,])
endRes <- diffEndTest(gamObj)
#endRes <- endRes[endRes$pvalue == 0,]
patternResSS$Gene <- rownames(patternResSS)
patternResSS$pattern <- patternResSS$fcMedian
patternResT <- patternResSS[, c("Gene", "pattern")]
endRes$Gene <- rownames(endRes)
endRes$end <- endRes$waldStat
endResT <- endRes[, c("Gene", "end")]
compare <- merge(patternResT, endResT, by = "Gene", all = FALSE)
compare$transientScore <-
rank(-compare$end, ties.method = "min") + rank(compare$pattern, ties.method = "random")
TopTransients <- tail(compare[order(compare$transientScore),],500)
datalist = c()
for (g in TopTransients$Gene){
localModel <- gamObj[[g]]
data <- localModel$model
y <- data$y
nCurves <- length(localModel$smooth)
for (jj in seq_len(nCurves)) {
df <- .getPredictRangeDf(localModel$model, jj, nPoints = 100)
yhat <- predict(localModel, newdata = df, type = "response")
Newframe <- data.frame("fittedCounts" = yhat)
colnames(Newframe) <- c(paste0("fittedCounts.",g,".",replace(as.character(jj), as.character(jj), lineageOrder[jj])[[2]]))
datalist[[paste(g,jj)]] <- Newframe
}
}
TopTransientsExpr = do.call(cbind, datalist)
leidenOrder = ["ProliferatingProgenitors", 'RadialGliaProgenitors', 'Neurons', 'GlutamatergicNeurons_early','GlutamatergicNeurons_late']
leidenCols = dict(zip(adataBranch.obs.leidenAnnotated.cat.categories, adataBranch.uns["leidenAnnotated_colors"]))
#leidenCols = dict(zip(adataTradeseq.obs.leidenAnnotated.cat.categories, adataTradeseq.uns["leidenAnnotated_colors"]))
#Curatedmarkerslist = CuratedmarkersDF.loc[CuratedmarkersDF.score > 1,"name"].tolist()
#CuratedmarkersDF = pd.read_csv(markers, header=None, sep = "\t", names=["name","area","score"])
#Curatedmarkerslist = CuratedmarkersDF.loc[CuratedmarkersDF.score >= 1,"name"].tolist()
##############################
###################à
# We re-set index according to originally provided dpt range
TopTransientsExpr.index = np.linspace(minDPT, maxDPT,TopTransientsExpr.shape[0])
for m in TopTransients.tail(30).Gene.unique():
plt.rcdefaults()
sns.set_style("ticks", {'axes.grid' : False})
fig, axes = plt.subplots(2,1, figsize=(6,6),sharex=True,
gridspec_kw={'height_ratios': [1, 3]})
plt.title(m)
#fig, axes = plt.subplots(2,1, figsize=(10,5),sharex=True,
# gridspec_kw={'height_ratios': [1, 2]})
#sns.histplot(data=adataTradeseq.obs, x="dpt_pseudotime", hue="leidenAnnotated", ax=axes[0])
# sns.histplot(data=adataTradeseq.obs, x="dpt_pseudotime", hue="leidenAnnotated", ax=axes[0],bins=100,
# palette=dict(zip(adata.obs.leidenAnnotated.cat.categories, adata.uns["leidenAnnotated_colors"])), alpha=.5)
sns.boxplot(data=adataTradeseq.obs, x="dpt_pseudotime", y="leidenAnnotated", ax = axes[0],
palette=leidenCols,
showfliers=False,order=leidenOrder)
#axes[0].set(xlabel=None)
#axes[0].tick_params(bottom=False)
#axes[0].set(xticklabels=[])
#axes[0].legend(title='Smoker', bbox_to_anchor=(1.05, 1))
#for m in ["S100A13","LMO3"]:
cList = [c for c in TopTransientsExpr.columns.tolist() if "."+m+"." in c]
if len(cList ) > 0:
SmoothedExpr = TopTransientsExpr[cList].copy()
SmoothedExpr["pseudotime"] = SmoothedExpr.index.tolist()
SmoothedExpr["pseudotime"] = np.linspace(minDPT,maxDPT, 100)
SmoothedExpr = SmoothedExpr.melt("pseudotime")
TerminalLeiden = SmoothedExpr['variable'].str.split('_', expand = True)[[1,2]]
SmoothedExpr['TerminalLeiden'] = TerminalLeiden[1]+'_'+TerminalLeiden[2]
#TrimExtremes
sns.lineplot(data=SmoothedExpr, x="pseudotime", y="value",palette=leidenCols,
hue= "TerminalLeiden", linewidth=2, ax=axes[1])
#axes[1].lines[1].set_linestyle("--")
dptAxis = np.linspace(SmoothedExpr.pseudotime.min(), SmoothedExpr.pseudotime.max(), 5).round(2)
ylimits = np.array([SmoothedExpr.value.min()-(abs(SmoothedExpr.value.min())*.5),
SmoothedExpr.value.max()+(SmoothedExpr.value.max()*.2)]).round(2)
exprAxis = np.linspace(ylimits.min(), ylimits.max(),5).round(2)
axes[1].set_xlim([dptAxis.min(), dptAxis.max()])
axes[1].set_ylim(ylimits)
axes[0].yaxis.tick_right()
axes[0].spines[['left', 'top','bottom']].set_visible(False)
axes[0].spines['right'].set_linewidth(1.5)
axes[0].tick_params(axis='y', which='major', labelsize=15, width=1.5, length=10)
axes[0].tick_params(axis='x', which='major', labelsize=15, width=0, length=10)
axes[0].set_xlabel("", visible=False)
axes[1].set_xticks(dptAxis)
axes[1].set_yticks(exprAxis)
axes[1].spines['left'].set_linewidth(1.5)
axes[1].spines['bottom'].set_linewidth(1.5)
axes[1].tick_params(axis='both', which='major', labelsize=15, width=1.5, length=10)
axes[1].spines[['right', 'top']].set_visible(False)
axes[1].xaxis.tick_bottom()
axes[1].yaxis.tick_left()
axes[1].legend(title='Gene', bbox_to_anchor=(1, .7))
sns.despine(ax=axes[1], top=True, right=True, left=False, bottom=False, offset=10, trim=False)
sns.despine(ax=axes[0], top=True, right=False, left=False, bottom=True, offset=10, trim=False)
axes[0].yaxis.set_label_position("right")
axes[0].spines[['left', 'top','bottom']].set_visible(False)
axes[1].set(xlabel='dpt pseudotime', ylabel='Smoothed log expression')
fig.show()
#fig.savefig(figDir+'/Deltas.pdf',bbox_inches='tight')
#Curatedmarkerslist = CuratedmarkersDF.loc[CuratedmarkersDF.score > 1,"name"].tolist()
#CuratedmarkersDF = pd.read_csv(markers, header=None, sep = "\t", names=["name","area","score"])
#Curatedmarkerslist = CuratedmarkersDF.loc[CuratedmarkersDF.score >= 1,"name"].tolist()
##############################
###################à
# We re-set index according to originally provided dpt range
TopTransientsExpr.index = np.linspace(minDPT, maxDPT,TopTransientsExpr.shape[0])
#for m in TopTransients.Gene.unique():
for m in ["EOMES"]:
plt.rcdefaults()
sns.set_style("ticks", {'axes.grid' : False})
fig, axes = plt.subplots(2,1, figsize=(6,6),sharex=True,
gridspec_kw={'height_ratios': [1, 3]})
plt.title(m)
#fig, axes = plt.subplots(2,1, figsize=(10,5),sharex=True,
# gridspec_kw={'height_ratios': [1, 2]})
#sns.histplot(data=adataTradeseq.obs, x="dpt_pseudotime", hue="leidenAnnotated", ax=axes[0])
# sns.histplot(data=adataTradeseq.obs, x="dpt_pseudotime", hue="leidenAnnotated", ax=axes[0],bins=100,
# palette=dict(zip(adata.obs.leidenAnnotated.cat.categories, adata.uns["leidenAnnotated_colors"])), alpha=.5)
sns.boxplot(data=adataTradeseq.obs, x="dpt_pseudotime", y="leidenAnnotated", ax = axes[0],
palette=leidenCols,
showfliers=False,order=leidenOrder)
#axes[0].set(xlabel=None)
#axes[0].tick_params(bottom=False)
#axes[0].set(xticklabels=[])
#axes[0].legend(title='Smoker', bbox_to_anchor=(1.05, 1))
#for m in ["S100A13","LMO3"]:
cList = [c for c in TopTransientsExpr.columns.tolist() if "."+m+"." in c]
if len(cList ) > 0:
SmoothedExpr = TopTransientsExpr[cList].copy()
SmoothedExpr["pseudotime"] = SmoothedExpr.index.tolist()
SmoothedExpr["pseudotime"] = np.linspace(minDPT,maxDPT, 100)
SmoothedExpr = SmoothedExpr.melt("pseudotime")
TerminalLeiden = SmoothedExpr['variable'].str.split('_', expand = True)[[1,2]]
SmoothedExpr['TerminalLeiden'] = TerminalLeiden[1]+'_'+TerminalLeiden[2]
#TrimExtremes
sns.lineplot(data=SmoothedExpr, x="pseudotime", y="value",palette=leidenCols,
hue= "TerminalLeiden", linewidth=2, ax=axes[1])
#axes[1].lines[1].set_linestyle("--")
dptAxis = np.linspace(SmoothedExpr.pseudotime.min(), SmoothedExpr.pseudotime.max(), 5).round(2)
ylimits = np.array([SmoothedExpr.value.min()-abs(((SmoothedExpr.value.min()*.5)+.1)),
SmoothedExpr.value.max()+abs((SmoothedExpr.value.max()*.1))]).round(2)
exprAxis = np.linspace(ylimits.min(), ylimits.max(),5).round(2)
axes[1].set_xlim([dptAxis.min(), dptAxis.max()])
axes[1].set_ylim(ylimits)
axes[0].yaxis.tick_right()
axes[0].spines[['left', 'top','bottom']].set_visible(False)
axes[0].spines['right'].set_linewidth(1.5)
axes[0].tick_params(axis='y', which='major', labelsize=15, width=1.5, length=10)
axes[0].tick_params(axis='x', which='major', labelsize=15, width=0, length=10)
axes[0].set_xlabel("", visible=False)
axes[1].set_xticks(dptAxis)
axes[1].set_yticks(exprAxis)
axes[1].spines['left'].set_linewidth(1.5)
axes[1].spines['bottom'].set_linewidth(1.5)
axes[1].tick_params(axis='both', which='major', labelsize=15, width=1.5, length=10)
axes[1].spines[['right', 'top']].set_visible(False)
axes[1].xaxis.tick_bottom()
axes[1].yaxis.tick_left()
axes[1].legend(title='Gene', bbox_to_anchor=(1, .7))
sns.despine(ax=axes[1], top=True, right=True, left=False, bottom=False, offset=10, trim=False)
sns.despine(ax=axes[0], top=True, right=False, left=False, bottom=True, offset=10, trim=False)
axes[0].yaxis.set_label_position("right")
axes[0].spines[['left', 'top','bottom']].set_visible(False)
axes[1].set(xlabel='dpt pseudotime', ylabel='Smoothed log expression')
fig.show()
fig.savefig(figDir+'/EOMESsmoothed.pdf',bbox_inches='tight')